In [1]:
import create_container_structures
In [2]:
import Department
In [3]:
import query
In [4]:
import sort_tokens_by_department
[('robust', 3), ('serve', 3), ('25', 3), ('transient', 3), ('crystal', 3), ('junction', 3), ('flow', 3), ('switching', 3), ('propagation', 3), ('dispersion', 3), ('magnetic', 3), ('three-phase', 3), ('single-phase', 3), ('electromagnetic', 3), ('drives', 3), ('dc-dc', 3), ('controlled', 3), ('operators', 3), ("'", 3), ('best', 3), ('weather', 3), ('solid', 3), ('market', 3), ('148', 3), ('161a', 3), ('schemes', 3), ('quantization', 3), ('microwave', 3), ('matter', 3), ('speaking', 3), ('bipolar', 4), ('field-effect', 4), ('amplifiers', 4), ('impulse', 4), ('transfer', 4), ('diodes', 4), ('tested', 4), ('junctions', 4), ('labs', 4), ('solar', 4), ('converters', 4), ('foundation', 4), ('autonomous', 4), ('sensor', 4), ('feedback', 4), ('discrete-time', 4), ('102', 4), ('holography', 4), ('fiber', 4), ('laser', 4), ('interfacing', 5), ('analog', 5), ("'s", 5), ('steady-state', 5), ('frequency', 5), ('filters', 5), ('conversion', 5), ('dc', 5), ('modulation', 5), ('35', 6), ('45', 6), ('65', 6), ('semiconductor', 6), ('energy', 6), ('operation', 6), ('107', 6), ('labview', 6), ('transistors', 7), ('signals', 8), ('electronics', 8), ('response', 8), ('grid', 8), ('communications', 9), ('optics', 15)]
[('polynomial', 3), ('1a/10a', 3), ('vectors', 3), ('derivatives', 3), ('sat', 3), ('10b', 3), ('uniform', 3), ('determinants', 3), ('affine', 3), ('derivative', 3), ('numbered', 3), ('10c', 3), ('analytic', 3), ('taylor', 3), ('algebra/multivariable', 3), ('well-prepared', 3), ('third', 3), ('putnam', 3), ('year', 3), ('factorization', 3), ('residue', 3), ('elliptic', 3), ('choose', 3), ('equation', 3), ('separation', 3), ('conditions', 3), ('dynamical', 3), ('convergence', 3), ('combinatorial', 3), ('interpolation', 3), ('compactness', 3), ('chosen', 3), ('274', 3), ('extra', 3), ('181b', 3), ('enumeration', 3), ('187', 3), ('topologies', 3), ('homotopy', 3), ('insurance', 3), ('polynomials', 4), ('rational', 4), ('trigonometric', 4), ('4c', 4), ('3c', 4), ('continuity', 4), ('limits', 4), ('integrals', 4), ('iii', 4), ('teachers', 4), ('103b', 4), ('two-quarter', 4), ('partitions', 4), ('heat', 4), ('boundary', 4), ('142a', 4), ('extremal', 4), ('multivariate', 4), ('logarithmic', 5), ('coordinates', 5), ('confidence', 5), ('intervals', 5), ('differentiation', 5), ('rings', 5), ('100b', 5), ('diophantine', 5), ('142b', 5), ('highly', 6), ('qualifying', 6), ('higher', 6), ('proof', 6), ('further', 6), ('140b', 6), ('exponential', 7), ('approximation', 7), ('topology', 7), ('variable', 8), ('ab', 8), ('integral', 8), ('bc', 8), ('formula', 8), ('ordinary', 8), ('three-quarter', 8), ('algebraic', 9), ('spaces', 11), ('ap', 12), ('previously', 13), ('31ch', 16), ('score', 21), ('differential', 30), ('calculus', 32), ('if', 35), ('listed', 42)]
[('exploration', 3), ('corequisite', 3), ('operating', 3), ('file', 3), ('consult', 3), ('advice', 3), ('page', 3), ('choosing', 3), ('automata', 3), ('166', 3), ('protocol', 3), ('care', 3), ('networked', 3), ('123', 3), ('124', 3), ('140l', 3), ('stereo', 3), ('gpu', 3), ('health', 3), ('alignment', 3), ('chem', 3), ('be28', 3), ('bi34', 3), ('ch37', 3), ('rome', 4), ('assistance', 4), ('analytics', 4), ('prediction', 4), ('protein', 4), ('genomics', 4), ('unix', 5), ('ds25', 5), ('rendering', 6), ('21', 7), ('40b', 7), ('databases', 7), ('80', 7), ('search', 7), ('15l', 9), ('181', 10), ('bimm', 10), ('java', 11), ('176', 11), ('sophomore', 13), ('junior', 14), ('cs28', 18), ('senior', 19), ('cs25', 33), ('cs26', 33), ('cs27', 36), ('restricted', 53)]
[('exploring', 3), ('brains', 3), ('findings', 3), ('neurobiology', 3), ('musical', 3), ('embodied', 3), ('101a', 3), ('abilities', 3), ('physiology', 3), ('thought', 3), ('action', 3), ('examined', 3), ('populations', 3), ('14b', 3), ('psyc', 3), ('adults', 3), ('gesture', 3), ('genes', 3), ('states', 3), ('see', 3), ('190b', 3), ('minds', 4), ('way', 4), ('humans', 4), ('understand', 4), ('neurological', 4), ('psychological', 4), ('neuroanatomy', 4), ('hds', 4), ('hdp', 4), ('hci', 4), ('role', 5), ('our', 5), ('culture', 5), ('psychology', 5), ('linguistics', 5), ('developmental', 5), ('mind', 5), ('evolutionary', 5), ('10', 5), ('studio', 5), ('107b', 5), ('evidence', 5), ('human-computer', 6), ('neuroscience', 6), ('children', 6), ('neurons', 6), ('17', 6), ('activity', 6), ('118b', 6), ('disorders', 6), ('examines', 7), ('behavioral', 7), ('107a', 9), ('?', 11), ('brain', 23), ('cognition', 26)]
In [5]:
import tokenizer
In [6]:
import generate_pie_plots_per_dept
In [1]:
import heatmap
In [1]:
import numpy as np
import re
import nltk
from graphviz import Digraph
import plotly.graph_objects as go
import pandas as pd
import plotly.express as px
import networkx as nx
In [2]:
class Department():
    # class initialization
    def __init__(self, name):
        self.name = name
        self.courses,self.descriptions = None,None
        self.preq,self.professors = {},None
        self.evals = None
        self.gpa_median,self.time_median = None,None
    
    # convert 93.6% to 0.936
    def stringPercentToFloat(self,percent):
        return round(float(percent[:-1]) / 100,3)
    
    # load all the CAPE data into self.courses
    def loadCourses(self,file):
        s = np.loadtxt(file,dtype = 'str',delimiter = "\t")
        res,evals = [],[]
        for i in range(s.shape[0]):
            row,row2 = [],[]
            course = s[i][1].split('-')
            row.append(course[0].strip()); row.append(course[1].strip())
            row.append(s[i][0]); row.append(s[i][2])
            row.append(self.stringPercentToFloat(s[i][5]))
            row.append(self.stringPercentToFloat(s[i][6]))
            row.append(float(s[i][7]))
            if (s[i][-2] != 'N/A'): row.append(float(re.findall(r'[^()]+', s[i][-2])[1]))
            else: row.append(0.0)
            if (s[i][-1] != 'N/A'): row.append(float(re.findall(r'[^()]+', s[i][-1])[1]))
            else: row.append(0.0)
            res.append(row)
            row2.append(s[i][3]); row2.append(s[i][4])
            evals.append(row2)
        self.courses = np.array(res)
        self.evals = np.array(evals)
    
    # load all the descriptions into self.description
    def loadDescription(self,filename):
        D,res = [],[]
        with open(filename,'r',errors='ignore') as file:
            for line in file:
                if line != '\n': D.append(line)
        for i in range(len(D) // 2):
            row = []
            course = D[2*i].split('.')
            row.append(course[0].strip()); row.append(course[1].strip())
            tokens = nltk.word_tokenize(D[2*i+1].lower().strip())
            row.append(np.array(tokens))
            res.append(row)
        self.descriptions = np.array(res)
    
    # calculate the median GPA of the department using all the CAPE reviews (with average GPA received)
    def Median_GPA(self):
        mask = np.where(self.courses[:,-1] != '0.0')[0]
        target = self.courses[mask][:,-1].astype(np.float)
        self.gpa_median = np.median(target)
        return np.median(target)
    
    # calculate the median time spent of the department using all the CAPE reviews
    def Median_Time(self):
        mask = np.where(self.courses[:,6] != '0.0')[0]
        target = self.courses[mask][:,6].astype(np.float)
        self.time_median = np.median(target)
        return np.median(target)
        
    # discard CAPE reviews that have no match to course descriptions (course no longer offered)
    def cleanData(self):
        s = np.array(list(set(self.descriptions[:,0])))
        mask = np.isin(self.courses[:,0],s)
        self.courses = self.courses[np.where(mask)]
        self.professors = np.array((list(set(self.courses[:,2]))))
        mask2 = np.where(self.courses[:,-1] == '0.0')[0]
        median = self.Median_GPA()
        self.Median_Time()
        for idx in mask2:
            self.courses[idx][-1] = str(median)
In [3]:
ece = Department('ECE')
ece.loadCourses('ECE_CAPE.txt')
ece.loadDescription('ECE_Description.txt')
ece.cleanData()
In [4]:
cse = Department('CSE')
cse.loadCourses('CSE_CAPE.txt')
cse.loadDescription('CSE_Description.txt')
cse.cleanData()
In [5]:
math = Department('MATH')
math.loadCourses('MATH_CAPE.txt')
math.loadDescription('MATH_Description.txt')
math.cleanData()
In [6]:
cogs = Department('COGS')
cogs.loadCourses('COGS_CAPE.txt')
cogs.loadDescription('COGS_Description.txt')
cogs.cleanData()
In [7]:
# calculate the average GPA of a given range (给CAPE reviews,算他们的平均GPA)
def avg_gpa(gpa):
    gpa_float = gpa.astype(np.float)
    mask = np.zeros(gpa_float.shape).astype(np.float)
    mask = gpa_float != mask
    total = np.count_nonzero(gpa_float)
    total = total if total > 0 else 1
    avg_gpa = np.sum(gpa_float,where=mask) / total
    return round(avg_gpa,3)
In [8]:
# calculate the average GPA of a specific department
def GPA_by_department(department):
    actual_gpa = department.courses[:,-1].astype(np.float)
    expected_gpa = department.courses[:,-2].astype(np.float)
    return avg_gpa(actual_gpa),avg_gpa(expected_gpa)
In [9]:
# calculate the average GPA of all the professors within a department
def GPA_by_professor(department):
    res = []
    for p in department.professors:
        row = []; row.append(p)
        mask = np.where(department.courses[:,2] == p)[0]
        temp = department.courses[mask]
        actual_gpa,expected_gpa = temp[:,-1],temp[:,-2]
        avg1,avg2 = avg_gpa(actual_gpa),avg_gpa(expected_gpa)
        row.append(avg1);row.append(avg2)
        res.append(row)
    res = np.array(res)
    index = np.argsort(res[:,1].astype(np.float))[::-1]
    return res[index]
In [10]:
# calculate the best courses within a department base on average GPA
def GPA_best_course(department):
    all_courses = np.array((list(set(department.courses[:,0]))))
    res = []
    for p in all_courses:
        row = []; row.append(p)
        mask = np.where(department.courses[:,0] == p)[0]
        temp = department.courses[mask]
        actual_gpa,expected_gpa = temp[:,-1],temp[:,-2]
        avg1,avg2 = avg_gpa(actual_gpa),avg_gpa(expected_gpa)
        row.append(avg1); row.append(avg2)
        res.append(row)
    res = np.array(res)
    index = np.argsort(res[:,1].astype(np.float))[::-1]
    return res[index]
In [11]:
# calculate the best courses within a department base on average time spent
def GPA_best_time(department):
    all_courses = np.array((list(set(department.courses[:,0]))))
    res = []
    for p in all_courses:
        row = []; row.append(p)
        mask = np.where(department.courses[:,0] == p)[0]
        temp = department.courses[mask]
        time_span = temp[:,6]
        avg_time = avg_gpa(time_span)
        row.append(avg_time)
        res.append(row)
    res = np.array(res)
#     print(res)
    index = np.argsort(res[:,1].astype(np.float))[::-1]
    return res[index]
In [12]:
names =['ECE','CSE','COGS','MATH']
departments = [ece,cse,cogs,math]
actual_gpa_department,expected_gpa_department = [],[]
colors = ['lightslategray,gray']

layout = go.Layout(yaxis=dict(range=[2.5,4]))

for item in departments:
    res = GPA_by_department(item)
    actual_gpa_department.append(res[0])
    expected_gpa_department.append(res[1])

fig = go.Figure(data=[
    go.Bar(name='Actual GPA', x=names, y=actual_gpa_department,marker_color=['skyblue']*5,opacity=0.5),
    go.Bar(name='Expected GPA', x=names, y=expected_gpa_department, marker_color=['blue']*5,opacity=0.5),
    
],layout=layout)
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [13]:
temp = GPA_by_professor(ece)
actual_gpa_prof = list(temp[:5,1])
expected_gpa_prof = list(temp[:5,2])

fig = go.Figure(data=[
    go.Bar(name='Actual GPA', x= list(temp[:5,0]), y=actual_gpa_prof,marker_color=['skyblue']*5,opacity=0.5),
    go.Bar(name='Expected GPA', x= list(temp[:5,0]), y=expected_gpa_prof,marker_color=['blue']*5,opacity=0.5)
])
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [14]:
temp = GPA_by_professor(ece)
actual_gpa_prof = list(temp[-19:-14,1])
expected_gpa_prof = list(temp[-19:-14,2])

layout = go.Layout(yaxis=dict(range=[0,4]))

fig = go.Figure(data=[
    go.Bar(name='Actual GPA', x= list(temp[-19:-14,0]), y=actual_gpa_prof,marker_color=['skyblue']*5,opacity=0.5),
    go.Bar(name='Expected GPA', x= list(temp[-19:-14,0]), y=expected_gpa_prof,marker_color=['blue']*5,opacity=0.5)
],layout = layout)
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [15]:
temp = GPA_best_course(ece)
actual_gpa_courses = list(temp[:5,1])
expected_gpa_courses = list(temp[:5,2])

layout = go.Layout(yaxis=dict(range=[2.5,4]))

fig = go.Figure(data=[
    go.Bar(name='Actual GPA', x= list(temp[:5,0]), y=actual_gpa_courses,marker_color=['skyblue']*5,opacity=0.5),
    go.Bar(name='Expected GPA', x= list(temp[:5,0]), y=expected_gpa_courses,marker_color=['blue']*5,opacity=0.5)
],layout = layout)
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [16]:
temp = GPA_best_time(ece)
time_span_ece = list(temp[:5,1])
layout = go.Layout(yaxis=dict(range=[12,14]))
fig = go.Figure(data=[
    go.Bar(name='Actual GPA', x= list(temp[:5,0]), y=time_span_ece)
],layout = layout)
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
# print(temp)
In [17]:
mask = np.where(ece.courses[:,0] == 'ECE 101')[0]
ece101 = ece.courses[mask]
ece101_quarter = ece101[:,3]
ece101_gpa = ece101[:,-1].astype(np.float)
ece101_gpa2 = ece101[:,-2].astype(np.float)

fig = go.Figure()
fig.add_trace(go.Scatter(x=ece101_quarter, y=ece101_gpa, name='ECE 101 actual GPA',
                         line=dict(color='blue', width=4)))
fig.add_trace(go.Scatter(x=ece101_quarter, y=ece101_gpa2, name='ECE 101 expected GPA',
                         line=dict(color='skyblue', width=4)))

fig.update_xaxes(title_text="quarter")
fig.update_yaxes(title_text="GPA")
In [18]:
# calculate the average gpa of a specific professor who has taught ECE 101 in the past
professor_set = set(ece101[:,2])
res = []
for p in list(professor_set):
    row = []
    row.append(p)
    mask = np.where(ece101[:,2]==p)[0]
    target = ece101[mask]
    avg = avg_gpa(target[:,-1])
    row.append(avg)
    res.append(row)
res = np.array(res)
index = np.argsort(res[:,1].astype(np.float))[::-1]
res = res[index]
In [19]:
names = res[:,0]
ece101_gpa = res[:,1]

# layout = go.Layout(yaxis=dict(range=[1.5,10]))
cs = [i for i in range(12,0,-1)]

layout = go.Layout(yaxis=dict(range=[1.5,4]))

fig = go.Figure(data=[
    go.Bar(name='gpa', x=names, y=ece101_gpa,marker={'color':cs,'colorscale': 'blues'})
],layout=layout)
# Change the bar mode
fig.update_layout(barmode='overlay')
fig.update_layout(plot_bgcolor='whitesmoke')
fig.show()
In [20]:
# return a 2d array and a 1d array
# the 2d array has the course number and the year
# the 1d contains the average GPA history of that course from 2007 to 2019
def GPA_Matrix(department):
    course_set = set(department.courses[:,0])
    res = []
    years = ['07','08','09','10','11','12','13','14','15','16','17','18','19']
    for course in course_set:
        row = []
        records = department.courses[np.where(department.courses[:,0] == course)[0]]
        for year in years:
            count,total = 0,0
            for record in records:
                if record[3][2:] == year:
                    total += record[-1].astype(np.float); count += 1
            if count != 0: row.append(total / count)
            else: row.append(0.0)
        res.append(row)
    return np.array(list(course_set)),np.array(res)
In [21]:
courses,z = GPA_Matrix(ece)
z = z[np.argsort(np.sum(z,axis = 1))[::-1]]

base = [2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019]

fig = go.Figure(data=go.Heatmap(
        z=z,
        x=base,
        y=courses[20:30],
        colorscale='blues'))

fig.update_layout(
    title='GPA ECE Courses',
    xaxis_nticks=20)

fig.show()
In [22]:
# extract average time spent and average gpa of all the CAPE reviews from four departemnts
ece_time = ece.courses[:,[6,-1]].astype(np.float)
cse_time = cse.courses[:,[6,-1]].astype(np.float)
math_time = math.courses[:,[6,-1]].astype(np.float)
cogs_time = cogs.courses[:,[6,-1]].astype(np.float)
In [23]:
# transform numpy array to pandas dataframe, label each dataframe as their department names
dic_ece = {'time':ece_time[:,0],'gpa':ece_time[:,1],'label':['ece']*ece_time.shape[0]}
df_ece= pd.DataFrame(dic_ece)
dic_cse = {'time':cse_time[:,0],'gpa':cse_time[:,1],'label':['cse']*cse_time.shape[0]}
df_cse = pd.DataFrame(dic_cse)
dic_cogs = {'time':cogs_time[:,0],'gpa':cogs_time[:,1],'label':['cogs']*cogs_time.shape[0]}
df_cogs= pd.DataFrame(dic_cogs)
dic_math = {'time':math_time[:,0],'gpa':math_time[:,1],'label':['math']*math_time.shape[0]}
df_math = pd.DataFrame(dic_math)
In [24]:
# concatenate all the dataframes
frames = [df_ece,df_cse,df_cogs,df_math]
df = pd.concat(frames)
In [25]:
# histogram of time spent
fig = px.histogram(df,
                   title='Hours spent per week',
                   x='time', 
                   color="label",
                   opacity=0.5,
                   color_discrete_sequence=['honeydew','skyblue','blue','darkblue'],
#                    color_discrete_sequence=['darkblue','blue','skyblue','lightskyblue']
                   orientation='v'
                  )
fig.update_layout(barmode='stack')

fig.show()
In [26]:
fig = go.Figure(data=go.Scatter(x=df_ece['time'], y=df_ece['gpa'], mode='markers',name='ece'))
fig.update_xaxes(title_text="time")
fig.update_yaxes(title_text="GPA")
fig.show()
In [27]:
from plotly.subplots import make_subplots

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=("ECE Department", "MATH Department", "COGS Department", "CSE Department"))

# layout = go.Layout(yaxis=dict(range=[0,4]),xaxis=dict(range=[0,20]))

fig.append_trace(go.Scatter(x=df_ece['time'], y=df_ece['gpa'], mode='markers',name='ece'),row=1,col=1)
fig.append_trace(go.Scatter(x=df_math['time'], y=df_math['gpa'], mode='markers',name='math'),row=1,col=2)
fig.append_trace(go.Scatter(x=df_cogs['time'], y=df_cogs['gpa'], mode='markers',name='cogs'),row=2,col=1)
fig.append_trace(go.Scatter(x=df_cse['time'], y=df_cse['gpa'], mode='markers',name='cse'),row=2,col=2)

fig.update_xaxes(title_text="time",range=[0,20],row=1,col=1)
fig.update_xaxes(title_text="time",range=[0,20],row=1,col=2)
fig.update_xaxes(title_text="time",range=[0,20],row=2,col=1)
fig.update_xaxes(title_text="time",range=[0,20],row=2,col=2)

fig.update_yaxes(title_text="GPA",range=[1,4.5],row=1,col=1)
fig.update_yaxes(title_text="GPA",range=[1,4.5],row=1,col=2)
fig.update_yaxes(title_text="GPA",range=[1,4.5],row=2,col=1)
fig.update_yaxes(title_text="GPA",range=[1,4.5],row=2,col=2)


fig.show()
In [28]:
ml_courses = ['ECE 175A','MATH 181D','CSE 152B', 'CSE 151A','CSE 184','CSE 158','COGS 185','COGS 188','COGS 118A','COGS 118B','COGS 9','COGS 108']
ml_courses_stat = []
for item in ml_courses:
    temp = None
    row = []
    if (item[:3] == 'ECE'): temp = ece.courses[np.where(ece.courses==item)[0]]
    elif (item[:3] == 'CSE'): temp = cse.courses[np.where(cse.courses==item)[0]]
    elif (item[:3] == 'MATH'): temp = math.courses[np.where(math.courses==item)[0]]
    else:
        temp = cogs.courses[np.where(cogs.courses==item)[0]]
    row.append(item)
    gpa = temp[:,-2]
    average_gpa = avg_gpa(gpa)
    row.append(average_gpa)
    time_span = temp[:,6]
    average_time = avg_gpa(time_span)
    row.append(average_time)
    
    ml_courses_stat.append(row)
ml_courses_stat = np.array(ml_courses_stat)
In [29]:
ml_courses_stat[1][1] = str(math.gpa_median)
ml_courses_stat[1][2] = str(math.time_median)
ml_courses_stat[2][1] = str(cse.gpa_median)
ml_courses_stat[2][2] = str(cse.time_median)
In [30]:
# mat_sort = mat[mat[:,2].argsort()]
ml_courses_stat = ml_courses_stat[ml_courses_stat[:,1].argsort()]
names = ml_courses_stat[:,0]
ml_courses_gpa = ml_courses_stat[:,1]
cs = [i for i in range(20,0,-1)]

layout = go.Layout(yaxis=dict(range=[2.5,4]))

fig = go.FigureWidget(data=[go.Bar(x=names, y=ml_courses_gpa,marker={'color':cs,'colorscale': 'blues'})]) 
fig.update_layout(barmode='overlay')
fig.update_layout(plot_bgcolor='whitesmoke')

fig.update_yaxes(title_text="GPA")

fig.show()
In [31]:
names = ml_courses_stat[:,0]
ml_courses_stat = ml_courses_stat[ml_courses_stat[:,2].argsort()[::-1]]
ml_courses_time = ml_courses_stat[:,2]

# layout = go.Layout(yaxis=dict(range=[2.5,10]))
cs = [i for i in range(20,0,-1)]

fig = go.Figure(data=[
    go.Bar(name='gpa', x=names, y=ml_courses_time,marker={'color':cs,'colorscale': 'blues'})
])
# Change the bar mode
fig.update_layout(barmode='overlay')
fig.update_layout(plot_bgcolor='whitesmoke')
fig.update_yaxes(title_text="GPA")
fig.show()
In [32]:
dic_ml = {'label' : ml_courses_stat[:,0],'gpa':ml_courses_stat[:,1].astype(np.float),
          'time':ml_courses_stat[:,2].astype(np.float)}
df_ml = pd.DataFrame(dic_ml)
In [33]:
fig = px.scatter(df_ml, x="time", y="gpa",text="label")
fig.update_traces(textposition='top center')
fig.update_xaxes(title_text="time")
fig.update_yaxes(title_text="GPA")
fig.update_layout(showlegend=True)
fig.show()
In [34]:
# sort the professor base on time
def TIME_by_professor(department):
    res = []
    for p in department.professors:
        row = []; row.append(p)
        mask = np.where(department.courses[:,2] == p)[0]
        temp = department.courses[mask]
        allTime = temp[:,6]
        avg_time = avg_gpa(allTime)
        row.append(avg_time)
        res.append(row)
    res = np.array(res)
    index = np.argsort(res[:,1].astype(np.float))[::-1]
    return res[index]
In [35]:
temp = TIME_by_professor(ece)
avg_time= list(temp[:5,1])
avg_worst = list(temp[-5:,1])

layout = go.Layout(yaxis=dict(range=[0,20]))

fig = go.Figure(data=[
    go.Bar(name='Most Time Spent', x= list(temp[:5,0]), y=avg_time,marker_color=['blue']*5,opacity=0.5,text=avg_time),
    go.Bar(name='Least Time Spent', x= list(temp[-5:,0]), y=avg_worst,marker_color=['skyblue']*5,opacity=0.5,text=avg_time),
],layout = layout)
# Change the bar mode
fig.update_layout(barmode='group')
fig.show()
In [ ]: